Steps: 1. Install the required Packages when necessary 2. Adjust settings: Path 3. Read the provided 0.3 Files needed 4. Read in the Functions file 0.4 5. Skip section 1 RNAseq completely, it has been done

Zander Spational Analysis

0 Global Setup

0.1 Path

before you start: .rs.restartR()

GitClonePath <- "/Users/admin/Desktop/ElbeEstuarineZander"
setwd(GitClonePath)

0.2 Packages

0.3 Files needed

#############################
#Feature Counts Output Files#
SL_Gill_cnt <- read.table(file.path(path_Input_RNA, "featureCounts_SL_Gill_counts.txt"), sep="\t", stringsAsFactors=FALSE, header = T)

SL_Liver_cnt <- read.table(file.path(path_Input_RNA, "featureCounts_SL_Liver_counts.txt"), sep="\t", stringsAsFactors=FALSE, header = T)

##################################################
#AnnotationFile Created in Sections 1.1.3 - 1.1.5#
SLUCGeneManual <- read.csv2(file.path(path_Input_RNA, "SLUCGene-Manual-17.06.2023.csv"))[-1]
SLUCOrgDb<- readRDS(file.path(path_Input_RNA, "SLUCOrgDb-2022.rds"))
IDS<- read.table(file.path(path_Input_RNA, "ids_gene_ids.tsv"), header = FALSE, sep="\t")
#############
#Sample File#
SAMDF<- read.table(file=file.path(path_Input_RNA, "SL_samplefile_04.01.2024.csv") ,sep=";",dec = ".")

0.4 Functions

#-

1 Import RNAseq data

1.1 Counts & create TPM

1.2 Ribosomal Counts filtered Counts & create TPM

Some comments on rRNA removal from poly-A enriched libraries, some rRNA genes were identified differentially expressed and were intially removed by this step

https://www.biostars.org/p/419845/ There’s no reason to bother removing rRNA if (like most people) you’re not quantifying it later. Usually one just looks at the percentage of reads in feature (e.g., with multiQC on the featureCounts output) and excludes outlier samples. That won’t tell you that a sample was an outlier due to rRNA contamination, but that’s rarely actionable information in and of itself (you’d still want to see it as an outlier in PCA). No, if you don’t care about them then you should remove the counts from the matrix. Otherwise you’re needless inflating the tests you’re doing and deflating your power. The normalization should be robust to their presence, but if there’s a LOT of rRNA contamination in one sample then that tends to cause issues with the normalization factors.

https://www.biostars.org/p/415008/ A properly depleted RNAseq should have less then 10% rRNA in it (as little as 2-3%). Normally the rRNA would be over 80-90%, thus your data already is depleted. While using the preprocessed reads, ribosomal RNA (rRNA) is removed with SortMeRNA v2.1b [22] when considering all of the available databases for https://www.genenames.org/data/genegroup/#!/group/848

1.3 Merge AnnotationDBs

Sander genename matched with Danio and Human, writing Human Symbol where possible, writing full Genename else, LOC where uncharacterized.

This is an elaborate process to get everything working and the output is provided so dont run

1.4 Blasts

Include Blast results in the Annotation file

1.5 Manual Curation

The Output from multiple blasts was manually curated by checking LOCnumbers on ncbi, blasting sequences manually taking only hits with query cover >80%, percent Ident >80% and significant E value

#-

2 Setup

#-

2.1 Physiology

2.1.1 Variable Boxplots

Boxplots of different physiological measurements

- Figure 2 D

# ID    Name    Abbreviation    Lat Long    Description
# 1.1   Medemgrund  MG  53.8363 8.88777 Ekm 712 close to inlet of Medem
# 1.2   Brunsbüttel BB  53.88742    9.19429 Ekm 694 close to Brunsbüttel Ports
# 1.3   Schwarztonnensand   SWS 53.71442    9.46976 Ekm 665 east of Schwarztonnensand on territorium of NDS
# 1.4   Twielenfleth    TW  53.60921    9.56536 Ekm 651 close to Twielenfleth Leuchtfeuer
# 1.5   Mühlenberger Loch   ML  53.54907    9.82338 Ekm 633 upfront Airbus Area



SAMDFSL         <- filter(SAMDF, Species == "SL")
SAMDFSL <- SAMDFSL[SAMDFSL$SAMPLE %in% Samples_SL, ]
SAMDFSL <- SAMDFSL %>% mutate(Ekm = case_when(
                    (Loc== "Medem Grund") ~ "MG Ekm-713", 
                    (Loc== "Brunsbuettel") ~ "BB Ekm-692", 
                    (Loc== "Schwarztonnen Sand") ~ "SS Ekm-665", 
                    (Loc== "Muehlenberger Loch") ~ "ML Ekm-633")
                    )
Ekm_labels <- as_labeller(c(
  'Medem Grund'="MG Ekm-713",
  'Brunsbuettel'="BB Ekm-692",
  'Schwarztonnen Sand'="SS Ekm-665",
  'Muehlenberger Loch'="ML Ekm-633"
))
samdf <- list("SAMDFSL" = SAMDFSL)
Variables<- c("Length", "Weight","Age", "HSI", "SSI", "FultonK") 
for (i in names(samdf)[grepl("SAMDFSL", names(samdf))]){
  require(EnvStats)
  require(ggrepel)
  require(cowplot)
  tryCatch({
  LocOrder=c("Medem Grund", "Brunsbuettel", "Schwarztonnen Sand", "Muehlenberger Loch")
  g       <- paste(i) 
  gg      <- sub('SAMDF', '', g)
  for (ii in Variables) {
    tryCatch({
    FILENAME    <- paste(paste(save_name, Type, gg, ii, "Boxplot", sep="_"),".png", sep="")
    
    ggg     <- paste(ii) 
    
    p <-ggplot(samdf[[i]], aes(x=Loc,y=samdf[[i]][[ii]],colour=fReplicates)) +
    geom_boxplot(aes(fill=samdf[[i]]$Species)) + atheme + #awhite2 + # geom_jitter(width=0.2) +
    scale_color_manual(values= ggplot2::alpha(col.Palette[[gg]], 1)) +    
    scale_fill_manual(values= ggplot2::alpha(col.Palette$col.Palette.species[[gg]], 0.4)) +
    geom_text_repel(aes(label = SampleID), data = samdf[[i]],   size=3)  +
    facet_grid(. ~ factor(fLoc, levels=LocOrder), labeller = Ekm_labels, drop=TRUE,scale="free",space="free_x") +
    labs(x = "", y = noquote(paste(ggg, " [", noquote(Units[ii]), "]", sep = ""))) + #"")
    #ggtitle(paste(gg,  ggg, "over all sampling sites and seasons (Summer, Winter, Spring,)"))
    theme(legend.position = "none") +
    guides(fill=guide_legend(title="Species")) +
    stat_n_text(size = 4, color = "grey20")+ #white
    theme(
         panel.background = element_rect(fill='transparent'),
         plot.background = element_rect(fill='transparent', color=NA),
         #panel.grid.major = element_blank(),
         #panel.grid.minor = element_blank(),
         legend.background = element_rect(fill='transparent'),
         legend.box.background = element_rect(fill='transparent') ) 
    g <- ggplot_gtable(ggplot_build(p))
    stripr <- which(grepl('strip-t', g$layout$name))
    fills <- ggplot2::alpha(col.Palette$col.Palette.LOC, 0.4)
    prow <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
    title <- ggdraw() + draw_label_themeRK(paste("Sander Lucioperca", ggg, "per Location"), element = "plot.title",x = 0.05, hjust = 0,     vjust = 0.5) #draw_label_themeRKwhite
    #subtitle <- ggdraw() + draw_label_themeRKwhite(paste(), 
    #                       element ="plot.subtitle",x = 0.05, hjust = 0, vjust = 1)
    #quartz()
    AAA<- cowplot::plot_grid(title, prow, ncol = 1, rel_heights = c(0.05, 0.98))
    plot(AAA)
    ggsave(AAA, filename = FILENAME, path = pathPlots, device='png', dpi=300, width = 4.5,
    height = 4)
    }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})}
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})}

2.1.2 Physio PCA

PCAplot of different physiological and abiotics factors as overview

##########################
#Zander Physiological PCA#
##########################
library(factoextra)
library(cowplot)
dat1 <- SAMDF[,names(SAMDF) %in% c("Species", "SampleID", "Loc", "Salinity", "O2", "Temperature", "SecchiDepth", "HSI",  "FultonK", "Weight", "Length", "SSI")] # , 
rownames(dat1) <- dat1$SampleID 
dat1 <- na.omit(dat1)
res.pca <- prcomp(dat1[,-which(names(dat1) %in% c("Species", "Loc", "SampleID"))],  scale = TRUE)
p <- fviz_eig(res.pca, addlabels = TRUE, ylim = c(0, 50), linecolor = "white",) +
      theme_minimal() + awhite + theme(axis.title.x = element_blank()) +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
      prow <- cowplot::plot_grid(p, labels = c(""), ncol = 1)
      A<- cowplot::plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
      plot(A)

      ggsave(A, filename = paste(paste(save_name, Type, "PCA-eig", unique(dat1$Species), sep="_"),".png", sep=""), path = pathPlots , device='png', dpi=300, width = 4,
      height = 4)
      
p <- fviz_pca_biplot(res.pca, 
                label="all", 
                habillage=dat1$Loc,
                addEllipses=TRUE, ellipse.level=0.95, ellipse.type = "confidence",
                palette = col.Palette$col.Palette.Locs,  legend.title = "Seasons", 
                repel = TRUE, 
                col.var = "red", alpha.var = 1, col.quanti.sup = "blue") +
                labs(title ="Bplit PCA of Physiological and Abiotic data") +
                theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
                theme(
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank()) 
      prow <- cowplot::plot_grid(p, labels = c(""), ncol = 1)
      A<- cowplot::plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
      plot(A)

      ggsave(A, filename = paste(paste(save_name, Type, "Physio-PCA", unique(dat1$Species), sep="_"),".png", sep=""), path = pathPlots , device='png',
             dpi=300, width = 10,height = 10)  

#-

3 Differential Expression

3.1 DESeq2

no Age + Sex as all Individuals are juveniles of LS1

tpmLiver <- readRDS(file.path(path_Output_RNA, "SL_Liver_featurecounts_tpm_NoRibo_08.01.24.rds"))
cntLiver <- readRDS(file.path(path_Output_RNA, "SL_Liver_featurecounts_countsNoRibo_08.01.24.rds"))

tpmGill <- readRDS(file.path(path_Output_RNA, "SL_Gill_featurecounts_tpm_NoRibo_08.01.24.rds"))
cntGill <- readRDS(file.path(path_Output_RNA, "SL_Gill_featurecounts_countsNoRibo_08.01.24.rds"))

cntLiver   <- cntLiver[,Samples_RNA_Liver] 
cntGill    <- cntGill[,Samples_RNA_Gill] 
#TPM Data
tpmLiver   <- tpmLiver[,Samples_RNA_Liver] 
tpmGill    <- tpmGill[,Samples_RNA_Gill]

library(DESeq2)
ddsLiver         <- DESeqDataSetFromMatrix(countData = cntLiver,
                              colData = SAMDF_RNA_Liver,
                              design = as.formula(paste("~",variable)))

ddsGill          <- DESeqDataSetFromMatrix(countData = cntGill,
                              colData = SAMDF_RNA_Gill,
                              design = as.formula(paste("~",variable)))

#vst-Transformation
vstGill   <- vst(ddsGill, blind = FALSE)
vstLiver  <- vst(ddsLiver, blind = FALSE)

#DESeq2
ddsGill  <- DESeq(ddsGill)
ddsLiver <- DESeq(ddsLiver)

resGill  <- results(ddsGill)
resLiver <- results(ddsLiver)


for (LFCThreshold in LFCThresholds) {
  print(paste("LFCThreshold", LFCThreshold))
  
  name <-paste0(Species, sep = "")
  assign(paste0(Species, sep = ""), list())
  .GlobalEnv[[name]] <-list("vstLiver"= vstLiver, 
                          "vstGill"=  vstGill,  
                          "ddsLiver"= ddsLiver, 
                          "ddsGill"=  ddsGill,
                          "tpmLiver"= tpmLiver, 
                          "tpmGill" = tpmGill)

  for (i in names(.GlobalEnv[[name]])[grepl("dds", names(.GlobalEnv[[name]]))]) {
  require(DESeq2)
  require(tidyverse)
  require(plyr)
  require(dplyr)
    

  a       <- length(.GlobalEnv[[name]])
  g       <- paste(i)
  gg      <- gsub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", i)
  name2   <- .GlobalEnv[[name]][names(.GlobalEnv[[name]])[grepl(paste(gg), names(.GlobalEnv[[name]]))]]
  vst     <- as.data.frame(assay(name2[[names(name2)[grepl("vst", names(name2))]]]))
  
  #Selection of right Samplefile
  if (gg == "Gill" ) {
  SAMDF <- SAMDF_RNA_Gill } else {SAMDF <- SAMDF_RNA_Liver}
  
  #Selection of Comparisons
  VariableOrder<-VariableOrder
  A <- as.data.frame(t(combn(SAMDF %>% 
  arrange(factor(Replicates, levels = VariableOrder)) %>% 
  pull(paste(variable)) %>% 
  unique(),2)))
  A$V3<-Reduce(function(...) paste(..., sep = "-"), A)
  
  mylist <- list() #create an empty list
  for (i in 1:nrow(A)) {
  res <- results(.GlobalEnv[[name]][[g]], lfcThreshold = LFCThreshold, alpha=0.05, contrast=c(variable,A[i,1],A[i,2]))
  mylist[[i]] <- res 
  names(mylist)[[i]] <- A[i,3]}
  
  mylist2 <-list()
  for (x in names(mylist)) {
  mylist2[[x]]<-dplyr::left_join(rownames_to_column(as.data.frame(mylist[[x]])),  
                          rownames_to_column(as.data.frame(vst[rownames(vst) %in%
                          rownames(as.data.frame(mylist[[x]])),])))

  mylist2 <- lapply(mylist2, function(y) {y[which(y$padj < alpha),]})}
  
  name2 <-paste0("res", gg, sep = "")
  assign(paste0("res", gg, sep = ""), list())
  .GlobalEnv[[name2]] <-lapply(mylist2, function(z){z[order(z$padj),]})
  
 .GlobalEnv[[name]][[a+1]] <- .GlobalEnv[[name2]]
  names( .GlobalEnv[[name]])[[a+1]] <- paste("res", sub("dds", "\\1", g), sep="")
 
  #####################
  #Add gene Annotation#
  #####################
  for (i in names(.GlobalEnv[[name]])[grepl("res", names(.GlobalEnv[[name]]))]) {
    for (ii in names(.GlobalEnv[[name]][[i]])) {
      #print(head(.GlobalEnv[[name]][[i]][[ii]],1))
      .GlobalEnv[[name]][[i]][[ii]] <- dplyr::left_join(.GlobalEnv[[name]][[i]][[ii]], SLUCGeneManual)
      .GlobalEnv[[name]][[i]][[ii]]$X.gene_id <- .GlobalEnv[[name]][[i]][[ii]]$rowname
      .GlobalEnv[[name]][[i]][[ii]]$rowname <- .GlobalEnv[[name]][[i]][[ii]]$Human_SYMBOL_Manual
      rownames(.GlobalEnv[[name]][[i]][[ii]]) <- make.unique(.GlobalEnv[[name]][[i]][[ii]]$rowname, sep = "_")
      #print(head(.GlobalEnv[[name]][[i]][[ii]],1))
      }
    }

  ###########
  #Save Data#
  ###########
  saveRDS(.GlobalEnv[[name]], file = paste0(file.path(path_Output_RNA,  paste(paste(paste(save_name, "DEG_Replicates_Outlier_pairwise", sep="_"), paste("LFC", LFCThreshold, sep=""), Date, sep="_"), ".rds", sep=""))))
  }
}
## [1] "LFCThreshold 1"
## [1] "LFCThreshold 0.05"
  ###########
  #Read Data#
  ###########
#Load the data with LFC Threshold 0.05
LFCThreshold <- 0.05
name <-paste0(Species, sep = "")
assign(paste0(Species, sep = ""), list()) 
.GlobalEnv[[name]]<-readRDS(file = paste0(file.path(path_Output_RNA,  paste(paste(paste(save_name, "DEG_Replicates_Outlier_pairwise", sep="_"), paste("LFC", "0.05", sep=""), Date, sep="_"), ".rds", sep=""))))

3.2 PCA

3.2.1 Overview PCA

PCA plot of vst-tranformed counts followed by an overview PCA analysis to allow outlier identification and drivers in gene expression

##################
# SampleDist PCAs#
##################
for (i in names(.GlobalEnv[[name]])[grepl("vst", names(.GlobalEnv[[name]]))]){
  require(plyr)
  require(ggrepel) 
  require(cowplot)
  if (OperatingSystem == "Mac" ) {
      quartz() }
  tryCatch({
  g       <- paste(i) 
  gg          <- gsub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", i)
  A<-pcaplotRK3(.GlobalEnv[[name]][[i]],intgroup = c("Replicates"), pcX = 1, pcY = 2, title="",ellipse = TRUE,     ellipse.prob = 0.5) +
  scale_fill_manual(values=col.Palette$SL) + #col.Palette.SeqCenter #col.Palette.Cruises
  scale_color_manual(values=col.Palette$SL) + atheme +
  theme_minimal() + awhite + theme(axis.title.x = element_blank()) +
  theme(
        panel.grid.major = element_line(colour = "grey50"), 
        panel.grid.minor = element_line(colour = "grey50"))
  prow <- cowplot::plot_grid(A, labels = c(""), ncol = 1)
  title <- ggdraw() + draw_label_themeRKwhite(paste(Species,  gg, Type), element = "plot.title",x = 0.05, hjust = 0,    vjust = 1)
  subtitle <- ggdraw() + draw_label_themeRKwhite(paste("vst-PCA", "with",       ... = 
  length(rownames(.GlobalEnv[[name]][[i]])),"genes",sep = " "), element = "plot.subtitle",x = 0.05, hjust = 0,   
  vjust = 1)
  A<- cowplot::plot_grid(title, subtitle, prow, ncol = 1, rel_heights = c(0, 0.05, 0.989))
  ggsave(A, filename = paste(paste(save_name, Type, gg, "PCA", sep="_") ,".png", sep=""), path = pathPlots , device='png', dpi=300, width = 7,
  height = 7)
  plot(A)
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
## [1] "Adapted from Marini F, Binder H (2019). “pcaExplorer: an R/Bioconductor package for interacting with RNA-seq principal components.” BMC Bioinformatics, 20(1), 331. doi:10.1186/s12859-019-2879-1, https://bioconductor.org/packages/pcaExplorer/."
## [1] "Adapted from Marini F, Binder H (2019). “pcaExplorer: an R/Bioconductor package for interacting with RNA-seq principal components.” BMC Bioinformatics, 20(1), 331. doi:10.1186/s12859-019-2879-1, https://bioconductor.org/packages/pcaExplorer/."

3.2.2 PCA Overview Analysis

Getting an overview of drivers in gene expression

##########
#PCAtools#
##########  
#All Code Adapted from: 
#https://github.com/kevinblighe/PCAtools 
#https://bioconductor.org/packages/devel/bioc/vignettes/PCAtools/inst/doc/PCAtools.html

for (i in names(.GlobalEnv[[name]])[grepl("vst", names(.GlobalEnv[[name]]))]){
  require(plyr)
  require(dplyr)
  require(ggrepel) 
  require(cowplot)
  require(PCAtools)

  tryCatch({
  g       <- paste(i) 
  gg      <- sub('...', '', g)  

  metadata <- colData(.GlobalEnv[[name]][[i]])
  metadata <- metadata[c("Replicates", "Species", "SampleID",  "Loc", "Salinity", "O2", "Temperature", "SecchiDepth", "HSI",  "FultonK", "Weight", "Length", "SSI")]

  #We join in the the SLUCGeneManual-Annotations but have to make them unique as they could otherwise not become rownames which is   necessary for the PCAplot
  
  vstData <- as.data.frame(assay(.GlobalEnv[[name]][[i]]))
  vstData$rowname <- rownames(vstData)
  vstData  <-dplyr::left_join(vstData, SLUCGeneManual)
  rownames(vstData) <- make.unique(vstData$Human_SYMBOL_Manua, sep = "_")
  vstData <- vstData[colnames(vstData) %in% rownames(metadata)]
  
 p <- PCAtools::pca(vstData, metadata = metadata, removeVar = 0.1)
 
  screeplot(p, axisLabSize = 18, titleLabSize = 22)
 P <-  biplot(p,
              x = "PC1", y = "PC2",
      showLoadings = TRUE,
      ntopLoadings = 10, 
      boxedLoadingsNames = TRUE,
      fillBoxedLoadings = "white", 
      lengthLoadingsArrowsFactor = 1,
      sizeLoadingsNames = 4,
      colLoadingsNames = 'red4',
      colLoadingsArrows = "red4",
      #boxedLabels = T,  
      #other parameters
      lab = NULL,
      colby = "Replicates", colkey = col.Palette$SL,
      hline = 0, vline = c(-25, 0, 25),
      vlineType = c('dotdash', 'solid', 'dashed'),
      gridlines.major = FALSE, gridlines.minor = FALSE,
      pointSize = 6,
      legendPosition = 'left', legendLabSize = 14, legendIconSize = 8.0,
     # shape = 'Grade', shapekey = c('Grade 1'=15, 'Grade 2'=17, 'Grade 3'=8),
      drawConnectors = FALSE,
      title = paste('PCA bi-plot', Species,  gg, Type, sep=" "),
      subtitle = 'PC1 versus PC2') + # caption = '27 PCs ≈ 80%' +
      theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
      theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank())
      prow <- cowplot::plot_grid(P, labels = c(""), ncol = 1)
      A <- cowplot::plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
      plot(A)
      ggsave(A, filename = paste(paste(save_name, Type, gg, "Gene-BiploPCA", sep="_") ,".png", sep=""), path = pathPlots , 
             device='png', dpi=300, width = 10,height = 6)

      
    ###################      
    #PairsPlot of PCAs#
    ###################
    PP<- pairsplotRK(p,
    components = getComponents(p, c(1:10)),
    triangle = TRUE, trianglelabSize = 12,
    hline = 0, vline = 0,
    pointSize = 0.4,
    gridlines.major = FALSE, gridlines.minor = FALSE,
    colby = "Replicates", colkey = col.Palette$SL, plotaxes = FALSE, #title = 'Pairs plot'
    margingaps = unit(c(-0.01, -0.01, -0.01, -0.01), 'cm')) +
    theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
    theme( panel.grid.major = element_blank(),panel.grid.minor = element_blank()) +
    theme(text = element_text(colour = OutlineColor))
    prow <- cowplot::plot_grid(PP, labels = c(""), ncol = 1)
    AA<- cowplot::plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
    ggsave(AA, bg='transparent', filename = paste(paste(save_name, Type, gg, "Gene-PairsPlorPCA", sep="_") ,".png", sep=""), path = pathPlots , 
             device='png', dpi=300, width = 10,height = 10)
    plot(AA)
    
    
   
    ######################     
    #PlotLoadings of PCAs#
    ######################    
    PL <- plotloadingsRK(p,
    rangeRetain = 0.05,
    labSize = 4.0,
    #title = 'Loadings plot',
    #subtitle = 'PC1, PC2, PC3, PC4, PC5',
    caption = 'Top 5% variables',
    shape = 21,
    col = c('blue', 'pink'),
    drawConnectors = TRUE) +
    theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
    theme( panel.grid.major = element_blank(),panel.grid.minor = element_blank()) +
    theme(text = element_text(colour = OutlineColor)) 
    prow <- cowplot::plot_grid(PL, labels = c(""), ncol = 1)
    AA<- cowplot::plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
    ggsave(AA, bg='transparent', filename = paste(paste(save_name, Type, gg, "Gene-PlotLoadingsPCA", sep="_") ,".png", sep=""), path = pathPlots , 
             device='png', dpi=300, width = 10,height = 5)
    plot(PL)
    
    

    ##################
    #Eigencor of PCAs#
    ################## 
    metavarsSL <- c("Salinity", "O2","Temperature", "SecchiDepth", "HSI",  "FultonK", "Weight","Length","SSI")
    EC<- eigencorplot(p,
    components = getComponents(p, 1:10),
    metavars = metavarsSL,
    col =c('darkgreen', "forestgreen",'cornsilk1',"gold", "goldenrod1"),
    #col = c("blue", 'lightblue', 'grey90', 'pink', 'darkred'),
    cexCorval = 0.7,
    colCorval = 'black',
    colTitleX = OutlineColor,
    colTitleY = OutlineColor,
    colLabX = OutlineColor,
    colLabY = OutlineColor,
    colMain = OutlineColor,
    fontCorval = 2,
    posLab = 'bottomleft',
    rotLabX = 45,
    #posColKey = 'top',
    cexLabColKey = 1.5,
    scale = TRUE,
    #main = 'PC1-10 correlations',
    colFrame = OutlineColor,
    plotRsquared = FALSE, 
    corMultipleTestCorrection = 'BH')
    #plotRsquared = TRUE,
    #corFUN = 'pearson',
    #corUSE = 'pairwise.complete.obs',
    # theme_minimal() + awhite + theme(axis.title.x = element_blank()) +
    # theme( panel.grid.major = element_blank(),panel.grid.minor = element_blank()) +
    # theme(text = element_text(colour = "white"))
    prow <- cowplot::plot_grid(EC, labels = c(""), ncol = 1)
    AA<- cowplot::plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
    ggsave(AA, bg='transparent', filename = paste(paste(save_name, Type, gg, "Gene-EigencorplotPCA", sep="_") ,".png", sep=""), path = pathPlots , 
             device='png', dpi=300, width = 10,height = 5)
    plot(AA)
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
## [1] "Adapted from https://github.com/kevinblighe/PCAtools \n#https://bioconductor.org/packages/devel/bioc/vignettes/PCAtools/inst/doc/PCAtools.html"

## [1] "Adapted from https://github.com/kevinblighe/PCAtools \n#https://bioconductor.org/packages/devel/bioc/vignettes/PCAtools/inst/doc/PCAtools.html"

#-

4 GSEA

4.1 KEGG & WIKI Pairwise Comparisons

Pathway analysis Kegg and Wiki for spatial comparisons

#########################################################
#For Reproducibility a Kegg.db was created on 26.06.2023# add use_internal_data=T to gseKEGG
#########################################################
#https://github.com/YuLab-SMU/clusterProfiler/issues/561
#We need to downlaod and create a newest KEGG.db prior use and than tell ClusterProfiler to use
#internal data: use_internal_data=T
#Update the clusterprofilier to the latest Github version ( the lastest version is 4.7.1.3)
#install.packages("remotes")
#remotes::install_github("YuLab-SMU/clusterProfiler")
#Establish a local KEGG database
# install the packages
# remotes::install_github("YuLab-SMU/createKEGGdb")
# # import the library and create a KEGG database locally
# library(createKEGGdb)
# species <-c("ath","hsa","mmu", "rno","dre","dme","cel")
# createKEGGdb::create_kegg_db(species)
# # You will get KEGG.db_1.0.tar.gz file in your working directory
# #install the KEGG.db and import it
# install.packages("KEGG.db_26.06.2023.tar.gz", repos=NULL,type="source")
# library(KEGG.db)
#add use_internal_data=T in your enrichKEGG function


######################
#GSEA with TPM Filter#
######################
#############
#KEGG & WIKI#
#############

#might take some minutes
  for (i in names(.GlobalEnv[[name]])[grepl("dds", names(.GlobalEnv[[name]]))]) {
  #https://github.com/kevinblighe/E-MTAB-6141
  tryCatch({  
  a           <- length(.GlobalEnv[[name]])
  g           <- paste(i)
  gg          <- sub('...', '', g)
  if (gg == "Gill" ) {
  SAMDF <- SAMDF_RNA_Gill } else {SAMDF <- SAMDF_RNA_Liver}
  Samples <-  if (gg == "Gill" ) {
      Samples <- Samples_RNA_Gill }else {
      Samples <- Samples_RNA_Liver }
  
  name2       <- .GlobalEnv[[name]][names(.GlobalEnv[[name]])[grepl(paste(gg), names(.GlobalEnv[[name]]))]]
  tpm         <- name2[[names(name2)[grepl("tpm", names(name2))]]]
  vst         <- assay(name2[[names(name2)[grepl("vst", names(name2))]]])
  res         <- name2[[names(name2)[grepl("res", names(name2))]]]

  Comparisons <- name2[[names(name2)[grepl("res", names(name2))]]]
  names(Comparisons) <-paste(gg, names(Comparisons), sep="-")
  
  FILENAME    <- paste("Replicates")
  TITLE       <- paste("Replicates")
  A           <- GeneGSEAPlotKEGGandWikiRKwhiteSLUC_Kegg_Online(tpm = tpm , vst = vst, Species = Species, tpm_value = 1, Samples = Samples, 
                               SAMDF = SAMDF, TITLE = TITLE, filename= FILENAME, Comparisons = Comparisons)}, 
  error=function(e){cat("ERROR :",conditionMessage(e), "\n")})}
## [1] "Liver-SLSU21MG-SLSU21BB"
## [1] "Liver-SLSU21MG-SLSU21SS"
## [1] "Liver-SLSU21MG-SLSU21ML"
## [1] "Liver-SLSU21BB-SLSU21SS"
## [1] "Liver-SLSU21BB-SLSU21ML"
## [1] "Liver-SLSU21SS-SLSU21ML"
## [1] "Comparisons with significant gene set enrichment"
## [1] "Liver-SLSU21MG-SLSU21SS" "Liver-SLSU21SS-SLSU21ML"

## [1] "WIKI Pathway Analysis"
## [1] "Comparisons with significant gene set enrichment"
## [1] "Liver-SLSU21MG-SLSU21SS" "Liver-SLSU21SS-SLSU21ML"

## [1] "Combined KEGG and WIKI gene set enrichment"

## [1] "Gill-SLSU21MG-SLSU21BB"
## [1] "Gill-SLSU21MG-SLSU21SS"
## [1] "Gill-SLSU21MG-SLSU21ML"
## [1] "Gill-SLSU21BB-SLSU21SS"
## [1] "Gill-SLSU21BB-SLSU21ML"
## [1] "Gill-SLSU21SS-SLSU21ML"
## [1] "Comparisons with significant gene set enrichment"
## [1] "Gill-SLSU21MG-SLSU21BB" "Gill-SLSU21MG-SLSU21ML" "Gill-SLSU21BB-SLSU21ML"
## [4] "Gill-SLSU21SS-SLSU21ML"

## [1] "WIKI Pathway Analysis"
## [1] "Comparisons with significant gene set enrichment"
## [1] "Gill-SLSU21MG-SLSU21BB" "Gill-SLSU21MG-SLSU21ML" "Gill-SLSU21SS-SLSU21ML"

## [1] "Combined KEGG and WIKI gene set enrichment"

#Remove Empty list elements
.GlobalEnv[[name]]<- .GlobalEnv[[name]][lapply(.GlobalEnv[[name]],length)>0]
names(.GlobalEnv[[name]])
##  [1] "vstLiver"             "vstGill"              "ddsLiver"            
##  [4] "ddsGill"              "tpmLiver"             "tpmGill"             
##  [7] "resLiver"             "resGill"              "gseKEGG.Liver"       
## [10] "gseWIKI.Liver"        "gseKEGGandWIKI.Liver" "gseKEGG.Gill"        
## [13] "gseWIKI.Gill"         "gseKEGGandWIKI.Gill"
###########
#Save Data#
###########
saveRDS(.GlobalEnv[[name]], file = paste0(file.path(path_Output_RNA,  paste(paste(paste(save_name, "DEG-Replicates-Outlier-GSEA-KEGGandWiki-Online", sep="_"), paste("LFC", LFCThreshold, sep=""), Date, sep="_"), ".rds", sep=""))))

###########
#Read Data#
###########
LFCThreshold <- 0.05
name <-paste0(Species, sep = "")
assign(paste0(Species, sep = ""), list()) 
.GlobalEnv[[name]]<-readRDS(.GlobalEnv[[name]], file = paste0(file.path(path_Output_RNA,  paste(paste(paste(save_name, "DEG-Replicates-Outlier-GSEA-KEGGandWiki-Online", sep="_"), paste("LFC", LFCThreshold, sep=""), Date, sep="_"), ".rds", sep=""))))

#Checkup of differential genes and Pathways identified: 
#Diferentially expressed Gill-Genes
print(stack(sapply(.GlobalEnv[[name]]$resGill,dim)[1,]))
##   values               ind
## 1    716 SLSU21MG-SLSU21BB
## 2    921 SLSU21MG-SLSU21SS
## 3   3392 SLSU21MG-SLSU21ML
## 4     56 SLSU21BB-SLSU21SS
## 5    880 SLSU21BB-SLSU21ML
## 6   1231 SLSU21SS-SLSU21ML
#Significant KEGG Pathways
print(stack(sapply(.GlobalEnv[[name]]$gseKEGG.Gill,dim)[1,]))
##   values                    ind
## 1     19 Gill-SLSU21MG-SLSU21BB
## 2     35 Gill-SLSU21MG-SLSU21ML
## 3      1 Gill-SLSU21BB-SLSU21ML
## 4     19 Gill-SLSU21SS-SLSU21ML
#Diferentially expressed Liver-Genes
print(stack(sapply(.GlobalEnv[[name]]$resLiver,dim)[1,]))
##   values               ind
## 1    125 SLSU21MG-SLSU21BB
## 2    935 SLSU21MG-SLSU21SS
## 3    390 SLSU21MG-SLSU21ML
## 4    272 SLSU21BB-SLSU21SS
## 5    195 SLSU21BB-SLSU21ML
## 6   1958 SLSU21SS-SLSU21ML
#Significant KEGG Pathways
print(stack(sapply(.GlobalEnv[[name]]$gseKEGG.Liver,dim)[1,]))
##   values                     ind
## 1      1 Liver-SLSU21MG-SLSU21SS
## 2     37 Liver-SLSU21SS-SLSU21ML

4.2 Hierachize KEGG Pathways

Download the KEGG Parent Pathway Map here: https://www.genome.jp/kegg-bin/get_htext?br08901.keg Order all Differentially Abundant Pathways to the Parent Categories Get all genes for relevant Pathways Cluster-Heatmap those -> Heatplot under GSEA KEGG&WIKI

4.2.1 Creation of KeggPathwayMaps

#############################
#Creation of KeggPathwayMaps#
#############################
# library(readxl)
# KeggPathwayMaps<- as.data.frame(read_excel(file.path(path_Output_RNA, "KeggPathwayMaps-08.05.2023-RK.xlsx")))[,-1]
# KeggPathwayMaps$PathwayID<- formatC(KeggPathwayMaps$hsa, width = 5, format = "d", flag = "0")
# KeggPathwayMaps$PathwayID<- paste("hsa", KeggPathwayMaps$PathwayID, sep="")
#
# KeggPathwayDescription<- as.data.frame(read_excel(file.path(path_Output_RNA, "KeggPathwayDescription-08.05.2023.xlsx")))
#
# KeggPathwayMaps <- dplyr::left_join(KeggPathwayMaps, KeggPathwayDescription)
# KeggPathwayMaps[c("PathwayID", "Description")]
#
# require(limma)
# tab <- getGeneKEGGLinks(species="hsa")
# tab$Symbol <- mapIds(org.Hs.eg.db, tab$GeneID,
#                        column="SYMBOL", keytype="ENTREZID")
# library(dplyr)
# A <- tab %>%group_by(PathwayID) %>%
#   summarise(Symbol = toString(Symbol)) %>%
#   ungroup()
# B <- tab %>%group_by(PathwayID) %>%
#   summarise(GeneID = toString(GeneID)) %>%
#   ungroup()
# BB<- dplyr::left_join(A,B)
# A <- merge(KeggPathwayMaps, BB,  all.x=TRUE, by="PathwayID")
# A$Symbol<- gsub("\\, ", "/", A$Symbol)
# A$GeneID<- gsub("\\, ", "/", A$GeneID)
# saveRDS(A, file = paste0(file.path(path_Output_RNA, "KeggPathwayMapsGenes08.05.2023"),".rds"))

# head(tab)
# tabOfInterest <- tab[tab$PathwayID == "hsa01100",]
# genes_of_interest <-  as.vector(tabOfInterest$Symbol)
#write.csv2(KeggPathwayMapsGenes, file.path(path_Output_RNA, "KeggPathwayMapsGenes09.05.23.csv"))

#KeggPathwayMapsGenes <- readRDS(file = paste0(file.path(path_Output_RNA_Annotation, "KeggPathwayMapsGenes08.05.2023"), ".rds"))

4.2.2 Tissue Specific KEGG output

######
#KEGG#
######
KeggPathwayMapsGenes <- readRDS(file = paste0(file.path(path_Input_RNA, "KeggPathwayMapsGenes08.05.2023"), ".rds"))
KeggPathwayMapsGenes$ID <- KeggPathwayMapsGenes$PathwayID 
KeggPathwayMapsGenes$PathwayGeneID <- KeggPathwayMapsGenes$GeneID
KeggPathwayMapsGenes$PathwaySymbol <- KeggPathwayMapsGenes$Symbol

####################
#Tissues Individual#
####################
for (i in names(.GlobalEnv[[name]])[grepl("gseKEGG.Liver|gseKEGG.Gill", names(.GlobalEnv[[name]]))]) {
  #https://github.com/kevinblighe/E-MTAB-6141
  tryCatch({  
  a           <- length(.GlobalEnv[[name]])
  g           <- paste(i)
  gg          <- gsub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", i)
  
  if (gg == "Gill" ) {
  SAMDF <- SAMDF_RNA_Gill } else {SAMDF <- SAMDF_RNA_Liver}
  Samples <-  if (gg == "Gill" ) {
      Samples <- Samples_RNA_Gill }else {
      Samples <- Samples_RNA_Liver }
  
  A <- .GlobalEnv[[name]][[i]]

  #Add the KeggPathwayMaps
  KeggPathwayMapsGenes$ID <- KeggPathwayMapsGenes$PathwayID 
  for (ii in names(A)) {
  A[[ii]] <- dplyr::left_join(A[[ii]], KeggPathwayMapsGenes[KeggPathwayMapsGenes$ID %in% A[[ii]]$ID,])}
  B <- do.call(rbind.data.frame, A)
  B <- B[!is.na(B$Parent),]

  B$Comparison <- gsub("\\..*","",rownames(B))
  B$Comparison <- gsub("(Gill|Liver)-*","",B$Comparison)
  rownames(B) <- NULL
  
  
  trial <- as.data.frame(B %>% 
    dplyr::group_by(Comparison, Child) %>% dplyr::summarise(GeneCount = length(unique(as.vector(unique(unlist(str_split(GeneID, "/"))))))))
  trial2 <- as.data.frame(B %>% 
    dplyr::group_by(Child) %>% 
    dplyr::summarise(ChildGeneCount = length(unique(as.vector(unique(unlist(str_split(GeneID, "/"))))))))
  trial3 <- dplyr::left_join(trial, trial2)
  trial3$ModuleRatio <- trial3$GeneCount / trial3$ChildGeneCount * 100
  
  B <- dplyr::left_join(B, trial3)
  BB <- B
  BB <- BB[c("Comparison", "NES", "Description", "GeneRatio", "p.adjust", "sign", "Child", "Parent", "ModuleRatio")]

  #Selection of Comparisons
  VariableOrder<-VariableOrder
  CompOrder <- as.data.frame(t(combn(SAMDF %>% 
  arrange(factor(Replicates, levels = VariableOrder)) %>% 
  pull(paste(variable)) %>% 
  unique(),2)))
  CompOrder$V3<-Reduce(function(...) paste(..., sep = "-"), CompOrder )
  CompOrder$V4 <- gsub("SLSU", "", CompOrder$V3)
  
  BB$fComparison <- factor(BB$Comparison, levels=CompOrder$V3[CompOrder$V3 %in% BB$Comparison])
  BB$fComp <- gsub("SLSU", "", BB$fComparison)
  BB$fComp <- factor(BB$fComp, levels=CompOrder$V4[CompOrder$V4 %in% BB$fComp])
  

  Order <- c("activated", "suppressed")
  mg <- ggplot(BB, aes(x = fComp, y = Child, color=ModuleRatio, size = ModuleRatio)) + geom_point() + scale_size(range = c(1, 15)) +
scale_color_gradientn(colours = c("blue", "pink", "red", "red"), 
                        values = scales::rescale(x = c(0, 10, 50, 100), to = c(0,1), from = c(0, 50) ))
mg <- mg + facet_grid(Parent ~ factor(sign, levels=Order), scales = "free", space = "free") + 
        #theme_minimal() + 
        atheme + 
        theme(strip.text.y = element_text(angle = 0))  +
        theme(#axis.text.x=element_blank(),
        #axis.ticks.x=element_blank(),
        axis.text.y=element_blank(),
        axis.title.y.left = element_blank(),
        axis.ticks.y =element_blank() 
        #axis.title.x = element_blank()
        ) +
  theme(
    panel.background = element_rect(fill='transparent'), #transparent panel bg
    plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
    #panel.grid.major = element_blank(), #remove major gridlines
    #panel.grid.minor = element_blank(), #remove minor gridlines
    legend.background = element_rect(fill='transparent'), #transparent legend bg
    legend.box.background = element_rect(fill='transparent'), #transparent legend panel
    panel.grid.major = element_line(colour = "grey40"), 
    panel.grid.minor = element_line(colour = "grey40") 
  ) +
  theme(axis.title.x.bottom =  element_text(color="white"), 
        strip.text = element_text(color = "black", face= "bold")) +
  ggtitle(paste(gg, "Enriched Pathways", sep=" "))

g <- ggplot_gtable(ggplot_build(mg))
        stripr <- which(grepl('strip-t', g$layout$name))
        fills <- alpha(c("red", "blue"), 0.4)
        k <- 1
        for (x in stripr) {
        j <- which(grepl('rect', g$grobs[[x]]$grobs[[1]]$childrenOrder))
        g$grobs[[x]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
        k <- k+1}
        
        prow <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
        AA<- print(cowplot::plot_grid(prow, ncol = 1, rel_heights = c(0.1, 0.99)))
ggsave(AA, filename = paste(paste(save_name, Type, paste("Slim_GSEA_KEGGPathway", gg, sep="_"), sep="_") ,".png", sep=""), path = pathPlots , device='png', dpi=300, width = 10,height = 12)
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}

4.2.3 Combined Tissue KEGG output

#######################
#Gill & Liver Combined#
#######################

Slims <- list()
for (i in names(.GlobalEnv[[name]])[grepl("gseKEGG.Gill|gseKEGG.Liver", names(.GlobalEnv[[name]]))]) {
  #https://github.com/kevinblighe/E-MTAB-6141
  tryCatch({  
  a           <- length(.GlobalEnv[[name]])
  g           <- paste(i)
  gg          <- gsub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", i)
  
  if (gg == "Gill" ) {
  SAMDF <- SAMDF_RNA_Gill } else {SAMDF <- SAMDF_RNA_Liver}
  Samples <-  if (gg == "Gill" ) {
      Samples <- Samples_RNA_Gill }else {
      Samples <- Samples_RNA_Liver }
  
  A<- .GlobalEnv[[name]][[i]]

  #Add the KeggPathwayMaps
  KeggPathwayMapsGenes$ID <- KeggPathwayMapsGenes$PathwayID 
  for (ii in names(A)) {
  A[[ii]] <- dplyr::left_join(A[[ii]], KeggPathwayMapsGenes[KeggPathwayMapsGenes$ID %in% A[[ii]]$ID,])}
  B <- do.call(rbind.data.frame, A)
  B <- B[!is.na(B$Parent),]

  B$Comparison <- gsub("\\..*","",rownames(B))
  B$Comparison <- gsub("(Gill|Liver)-*","",B$Comparison)
  rownames(B) <- NULL

  trial <- as.data.frame(B %>% 
    dplyr::group_by(Comparison, Child) %>% dplyr::summarise(GeneCount = length(unique(as.vector(unique(unlist(str_split(GeneID, "/"))))))))
  trial2 <- as.data.frame(B %>% 
    dplyr::group_by(Child) %>% 
    dplyr::summarise(ChildGeneCount = length(unique(as.vector(unique(unlist(str_split(GeneID, "/"))))))))
  trial3 <- dplyr::left_join(trial, trial2)
  trial3$ModuleRatio <- trial3$GeneCount / trial3$ChildGeneCount * 100
  
  B <- dplyr::left_join(B, trial3)
  BB <- B
  BB <- BB[c("Comparison", "NES", "Description", "GeneRatio", "p.adjust", "sign", "Child", "Parent", "ModuleRatio")]
  
  #Selection of Comparisons
  VariableOrder<-VariableOrder
  CompOrder <- as.data.frame(t(combn(SAMDF %>% 
  arrange(factor(Replicates, levels = VariableOrder)) %>% 
  pull(paste(variable)) %>% 
  unique(),2)))
  CompOrder$V3<-Reduce(function(...) paste(..., sep = "-"), CompOrder )
  CompOrder$V4 <- gsub("SLSU", "", CompOrder$V3)
  
  BB$fComparison <- factor(BB$Comparison, levels=CompOrder$V3[CompOrder$V3 %in% BB$Comparison])
  BB$fComp <- gsub("SLSU", "", BB$fComparison)
  BB$fComp <- factor(BB$fComp, levels=CompOrder$V4[CompOrder$V4 %in% BB$fComp])
  BB$Tissue <- paste(gg)
  
  b <- length(Slims)
  Slims[[b+1]] <- BB
  names(Slims)[[b+1]] <- gg  
  
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})} 
  
  
Slim <- do.call(rbind.data.frame, Slims)
rownames(Slim) <- c()
Slim$fComp <- factor(Slim$fComp, levels=CompOrder$V4[CompOrder$V4 %in% Slim$fComp])


Order <- c("Gill", "Liver")
mg <- ggplot(Slim, aes(x = fComp, y = Child, color=sign, size = ModuleRatio)) + geom_point() + scale_size(range = c(1, 15)) 
#scale_color_manual(colours = c("blue",  "red")) 
#scale_colour_manual(values=c("blue",  "red"))
mg <- mg + facet_grid(Parent ~ factor(Tissue, levels=Order), scales = "free", space = "free") + 
        #theme_minimal() + 
        atheme + 
        theme(strip.text.y = element_text(angle = 0))  +
        theme(#axis.text.x=element_blank(),
        #axis.ticks.x=element_blank(),
        axis.text.y=element_blank(),
        axis.title.y.left = element_blank(),
        axis.ticks.y =element_blank() 
        #axis.title.x = element_blank()
        ) +
  theme(
    panel.background = element_rect(fill='transparent'), #transparent panel bg
    plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
    #panel.grid.major = element_blank(), #remove major gridlines
    #panel.grid.minor = element_blank(), #remove minor gridlines
    legend.background = element_rect(fill='transparent'), #transparent legend bg
    legend.box.background = element_rect(fill='transparent'), #transparent legend panel
    panel.grid.major = element_line(colour = "grey40"), 
    panel.grid.minor = element_line(colour = "grey40") 
  ) +
  theme(axis.title.x.bottom =  element_text(color="white"), 
        strip.text = element_text(color = "black", face= "bold"))

g <- ggplot_gtable(ggplot_build(mg))
        stripr <- which(grepl('strip-t', g$layout$name))
        fills <- alpha(c("salmon", "darkred"), 0.4)
        k <- 1
        for (x in stripr) {
        j <- which(grepl('rect', g$grobs[[x]]$grobs[[1]]$childrenOrder))
        g$grobs[[x]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
        k <- k+1}
        prow <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
        AA<- print(cowplot::plot_grid(prow, ncol = 1, rel_heights = c(0.1, 0.99)))

ggsave(AA, filename = paste(paste(save_name, Type, paste("Slim_Tissues_GSEA_KEGGPathway", gg, sep="_"), sep="_") ,".png", sep=""), path = pathPlots ,device='png', dpi=300, width = 10,height = 12)

4.2.4 KEGG Specific Child Terms

######################
#Specific Child Terms#
######################

Slim <- do.call(rbind.data.frame, Slims)
rownames(Slim) <- c()
Slim$fComp <- factor(Slim$fComp, levels=CompOrder$V4[CompOrder$V4 %in% Slim$fComp])

SlimSpecific <- Slim[Slim$Parent %in% c("Carbon metabolism", "Metabolic pathways"), ]

Order <- c("Gill", "Liver")
mg <- ggplot(SlimSpecific,aes(x = fComp, y = Description, color=sign, size = ModuleRatio)) + geom_point() + scale_size(range = c(1, 15))
#scale_color_manual(colours = c("blue",  "red")) 
#scale_colour_manual(values=c("blue",  "red"))
mg <- mg + facet_grid(Child ~ factor(Tissue, levels=Order), scales = "free", space = "free") + 
        #theme_minimal() + 
        atheme + 
        theme(strip.text.y = element_text(angle = 0))  +
        theme(#axis.text.x=element_blank(),
        #axis.ticks.x=element_blank(),
        axis.text.y=element_blank(),
        axis.title.y.left = element_blank(),
        axis.ticks.y =element_blank() 
        #axis.title.x = element_blank()
        ) +
  theme(
    panel.background = element_rect(fill='transparent'), #transparent panel bg
    plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
    #panel.grid.major = element_blank(), #remove major gridlines
    #panel.grid.minor = element_blank(), #remove minor gridlines
    legend.background = element_rect(fill='transparent'), #transparent legend bg
    legend.box.background = element_rect(fill='transparent'), #transparent legend panel
    panel.grid.major = element_line(colour = "grey40"), 
    panel.grid.minor = element_line(colour = "grey40") 
  ) +
  theme(axis.title.x.bottom =  element_text(color="white"), 
        strip.text = element_text(color = "black", face= "bold"))

g <- ggplot_gtable(ggplot_build(mg))
        stripr <- which(grepl('strip-t', g$layout$name))
        fills <- alpha(c("salmon", "darkred"), 0.4)
        k <- 1
        for (x in stripr) {
        j <- which(grepl('rect', g$grobs[[x]]$grobs[[1]]$childrenOrder))
        g$grobs[[x]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
        k <- k+1}
        prow <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
        AA<- print(cowplot::plot_grid(prow, ncol = 1, rel_heights = c(0.1, 0.99)))

ggsave(AA, filename = paste(paste(save_name, Type, paste("Slim_Metabolism_GSEA_KEGGPathway", gg, sep="_"), sep="_") ,".png", sep=""), path = pathPlots , device='png', dpi=300, width = 10,height = 12)

#-

5 Aggregate DESeq2 Heatmap

DeseqLFC005 <- readRDS(file = paste0(file.path(path_Output_RNA,  paste(paste(paste(save_name, "DEG_Replicates_Outlier_pairwise", sep="_"), paste("LFC", "0.05", sep=""), Date, sep="_"), ".rds", sep=""))))
DeseqLFC1 <- readRDS(file = paste0(file.path(path_Output_RNA,  paste(paste(paste(save_name, "DEG_Replicates_Outlier_pairwise", sep="_"), paste("LFC", "1", sep=""), Date, sep="_"), ".rds", sep=""))))

##########
#Heatmap #
##########
tpm_value <- 1; print(paste("TPM value =", tpm_value))
## [1] "TPM value = 1"
columnGillClusterNumber  <- 1
rowGillClusterNumber     <- 4
columnLiverClusterNumber <- 1
rowLiverClusterNumber    <- 4
WidthValue  <- 10
HeightValue <- 10

hmap_list <- list()
for (resSet in names(DeseqLFC005)[grepl("dds", names(DeseqLFC005))]) {
  #https://github.com/kevinblighe/E-MTAB-6141
  tryCatch({
    
    hmap_list_length <- length(hmap_list)
    Tissue      <- gsub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", resSet); print(resSet)
    Samples <-  if (Tissue == "Gill" ) {
      Samples <- Samples_RNA_Gill }else {
      Samples <- Samples_RNA_Liver }
    
    #Selection of  Sample file
    SAMDF <- if (Tissue == "Gill" ) {
    SAMDF <- SAMDF_RNA_Gill } else {SAMDF <- SAMDF_RNA_Liver}
    
    TPM  <- DeseqLFC005[[paste("tpm", Tissue, sep="")]]
    RES  <- DeseqLFC005[[paste("res", Tissue, sep="")]]
    Core <- DeseqLFC1[[paste("res", Tissue, sep="")]]

    resgenelist   <- list()
      for (PairwiseComp in names(RES)) {
          RES[[PairwiseComp ]]$Human_SYMBOL_Manual_unique <- 
            make.unique(RES[[PairwiseComp ]]$rowname, sep = "_")
          genes1 <- RES[[PairwiseComp]]$Human_SYMBOL_Manual_unique
          genes2 <- RES[[PairwiseComp]]$Geneid
          genes3 <- RES[[PairwiseComp]]$ENTREZID_Manual
          resgenelist[[PairwiseComp]]$Human_SYMBOL_Manual_unique <- genes1
          resgenelist[[PairwiseComp]]$Geneid <- genes2
          resgenelist[[PairwiseComp]]$ENTREZID <- genes3
          }
          dedup_ids <- do.call(rbind.data.frame, resgenelist)
          dedup_ids <- dedup_ids[!duplicated(dedup_ids$Geneid),]
          
     resgenelistCore   <- list()
      for (PairwiseComp in names(Core)) {
          Core[[PairwiseComp ]]$Human_SYMBOL_Manual_unique <- 
            make.unique(Core[[PairwiseComp ]]$rowname, sep = "_")
          genes1 <- Core[[PairwiseComp]]$Human_SYMBOL_Manual_unique
          genes2 <- Core[[PairwiseComp]]$Geneid
          genes3 <- Core[[PairwiseComp]]$ENTREZID_Manual
          resgenelistCore[[PairwiseComp]]$Human_SYMBOL_Manual_unique <- genes1
          resgenelistCore[[PairwiseComp]]$Geneid <- genes2
          resgenelistCore[[PairwiseComp]]$ENTREZID <- genes3
          }
          dedup_idsCore <- do.call(rbind.data.frame, resgenelistCore)
          dedup_idsCore <- dedup_idsCore[!duplicated(dedup_idsCore$Geneid), ]
          Core <- dedup_idsCore$Human_SYMBOL_Manual_unique
          length(Core)
          #Replace long names in the Core variable#
          Core[which(nchar(Core) > 27)] <- substr(Core[which(nchar(Core) > 27)], 1, 27)
          
          TPM <- TPM[names(TPM) %in% SAMDF$SAMPLE]
          TPM$Geneid <- rownames(TPM)
          TPM  <-dplyr::left_join(TPM, SLUCGeneManual[c("Geneid", "Human_SYMBOL_Manual", "ENTREZID_Manual")])
          TPM$Human_SYMBOL_Manual_unique <- make.unique(TPM$Human_SYMBOL_Manual, sep = "_") 

          FILENAME2   <- paste(paste(save_name, "TPM_Heatmap", Tissue, Type, "noClust", sep="_"), ".png", sep="")
          
          TITLE       <- draw_label_themeRKwhite(
                        paste(Species, Tissue, Type, Analysis, sep=" "),
                        element = "plot.title", x =  0.05, hjust = 0, vjust = 1)
    
    
          Input     <- TPM[TPM$Geneid %in% dedup_ids$Geneid, ]
          print(paste(Tissue, "genes before TPM filter", tpm_value, ":", dim(Input)[1]))
          Input     <- Input[(rowSums(Input[,Samples] > tpm_value) >= 3),] #TPM Filter
          print(paste(Tissue, "genes after TPM filter", tpm_value, ":", dim(Input)[1]))
          rownames(Input) <- Input$Human_SYMBOL_Manual_unique
          
          table(rownames(Input) %in% Core)

          GeneHeatPlotRK_Clust_SL_tpm_core(Species = Species, Input = Input, 
                          Samples = Samples, SAMDF = SAMDF,   TITLE = TITLE, 
                          filename= FILENAME2, OutlineColor = "grey30") 
          
         hmap_list[[hmap_list_length+1]] <- hmap
         hmap_list[[hmap_list_length+2]] <- Input
         names(hmap_list)[[hmap_list_length+1]] <- paste("hmap", Tissue, Type, sep="_")
         names(hmap_list)[[hmap_list_length+2]] <- paste("Input", Tissue, Type, sep="_")
         
        }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
## [1] "ddsLiver"
## [1] "Liver genes before TPM filter 1 : 2936"
## [1] "Liver genes after TPM filter 1 : 2775"
## [1] "ddsGill"
## [1] "Gill genes before TPM filter 1 : 4636"
## [1] "Gill genes after TPM filter 1 : 4520"

5.1 Cluster ORA

5.1.1 Extract Cluster
#############################  
#Genes from Extract Clusters#
#############################
Cluster       <- list()
for (resSet in names(DeseqLFC005)[grepl("dds", names(DeseqLFC005))]) {
  #https://github.com/kevinblighe/E-MTAB-6141
  tryCatch({
    
    a <- length(Cluster)
    
    Tissue      <- gsub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", resSet); print(resSet)
    Samples <-  if (Tissue == "Gill" ) {
      Samples <- Samples_RNA_Gill }else {
      Samples <- Samples_RNA_Liver }
    
    #Selection of  Sample file
    SAMDF <- if (Tissue == "Gill" ) {
    SAMDF <- SAMDF_RNA_Gill } else {SAMDF <- SAMDF_RNA_Liver}
    
    TPM  <- DeseqLFC005[[paste("tpm", Tissue, sep="")]]
    RES  <- DeseqLFC005[[paste("res", Tissue, sep="")]]
    Core <- DeseqLFC1[[paste("res", Tissue, sep="")]]

    Samples <-  SAMDF$SAMPLE
    
    hmap        <- hmap_list[[paste("hmap", Tissue, Type, sep="_")]]
    Input       <- hmap_list[[paste("Input", Tissue, Type, sep="_")]]
    Input       <- Input[,names(Input) %in% Samples]
    heat        <- t(scale(t(as.matrix(Input))))   #z-Score of vst data
    
    ##############################
    #Extract Cluster from Heatmap#
    ##############################
    hmap        <- draw(hmap)
    clrow       <- row_order(hmap)
    genecluster <- lapply(names(clrow), function(x){
         out  <- data.frame(GeneID = rownames(heat[clrow[[x]],]),
            clrow = paste0(x),stringsAsFactors = FALSE)
            return(out)}) %>%  
         do.call(rbind, .)
    
    Cluster1    <- genecluster[genecluster$clrow %in% c("Cluster 1"),]
    Cluster2    <- genecluster[genecluster$clrow %in% c("Cluster 2"),]
    Cluster3    <- genecluster[genecluster$clrow %in% c("Cluster 3"),]
    Cluster4    <- genecluster[genecluster$clrow %in% c("Cluster 4"),]

    Cluster1    <- heat[rownames(heat) %in% Cluster1$GeneID,]
    Cluster2    <- heat[rownames(heat) %in% Cluster2$GeneID,]
    Cluster3    <- heat[rownames(heat) %in% Cluster3$GeneID,]
    Cluster4    <- heat[rownames(heat) %in% Cluster4$GeneID,]

    Cluster[[a+1]] <- as.data.frame(Cluster1)
    Cluster[[a+2]] <- as.data.frame(Cluster2)
    Cluster[[a+3]] <- as.data.frame(Cluster3)
    Cluster[[a+4]] <- as.data.frame(Cluster4)

    names(Cluster)[[a+1]] <- paste("Cluster1-", sub("dds", "\\1", Tissue), sep="")
    names(Cluster)[[a+2]] <- paste("Cluster2-", sub("dds", "\\1", Tissue), sep="")
    names(Cluster)[[a+3]] <- paste("Cluster3-", sub("dds", "\\1", Tissue), sep="")
    names(Cluster)[[a+4]] <- paste("Cluster4-", sub("dds", "\\1", Tissue), sep="")
    
  },error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
  
}
## [1] "ddsLiver"

## [1] "ddsGill"

Clusters <- list("Cluster-Liver" = Cluster[names(Cluster)[grepl("Liver", names(Cluster))]], 
                 "Cluster-Gill" = Cluster[names(Cluster)[grepl("Gill", names(Cluster))]])

saveRDS(Clusters, file = paste0(file.path(path_Output_RNA,  paste(paste(paste(save_name, "DEG-Replicates-LFC005-Outlier-hmap-Cluster", sep="_"), paste("LFC", LFCThreshold, sep=""), Date, sep="_"), ".rds", sep=""))))
5.1.2 ORA KEGG
###############
#ORA Go & KEGG#
###############
OraKeggPlot_list <- list()
for (resSet in names(DeseqLFC005)[grepl("dds", names(DeseqLFC005))]) {
  #https://github.com/kevinblighe/E-MTAB-6141
  
    Tissue      <- gsub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", resSet); print(resSet)
    Samples <-  if (Tissue == "Gill" ) {
      Samples <- Samples_RNA_Gill }else {
      Samples <- Samples_RNA_Liver }
    
    #Selection of  Sample file
    SAMDF <- if (Tissue == "Gill" ) {
    SAMDF <- SAMDF_RNA_Gill } else {SAMDF <- SAMDF_RNA_Liver}
    
    TPM  <- DeseqLFC005[[paste("tpm", Tissue, sep="")]]
    RES  <- DeseqLFC005[[paste("res", Tissue, sep="")]]
    Core <- DeseqLFC1[[paste("res", Tissue, sep="")]]

    Samples     <-  SAMDF$SAMPLE
    CLUSTER     <- Clusters[[paste("Cluster", Tissue, sep="-")]]
    Input       <- hmap_list[[paste("Input", Tissue, Type, sep="_")]]
    
    
    for (Cluster in names(CLUSTER)) {
      tryCatch({
      OraKeggPlot_list_length <- length(OraKeggPlot_list)
        
      FILENAME      <- paste("Cluster_ORA", Cluster , Type, sep="_" )
      
      
      
      TITLE         <- paste(Cluster , Type, sep="_" )
      
      
      dedup_ids <- left_join(CLUSTER[[Cluster]] %>% 
                            mutate(Human_SYMBOL_Manual_unique = rownames(CLUSTER[[Cluster]])),
                             Input[-which(names(Input) %in% Samples)])
  
      WGCNAORAPlotRKwhite6SLUC_Publication(TPM = TPM, vst = vst, Species = Species, TPM_value = 1, 
                              Samples = Samples, SAMDF = SAMDF,  TITLE = TITLE, 
                              filename= FILENAME , dedup_ids = dedup_ids)
      
      }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
      OraKeggPlot_list[[OraKeggPlot_list_length+1]] <- KEGGUp
      names(OraKeggPlot_list)[[OraKeggPlot_list_length+1]] <- TITLE
    }
}
## [1] "ddsLiver"
## [1] "tpm"
## [1] 32916    22
## [1] 15590    22

## [1] "tpm"
## [1] 32916    22
## [1] 15590    22

## [1] "tpm"
## [1] 32916    22
## [1] 15590    22

## [1] "tpm"
## [1] 32916    22
## [1] 15590    22

## [1] "ddsGill"
## [1] "tpm"
## [1] 32916    22
## [1] 20571    22

## [1] "tpm"
## [1] 32916    22
## [1] 20571    22

## [1] "tpm"
## [1] 32916    22
## [1] 20571    22

## [1] "tpm"
## [1] 32916    22
## [1] 20571    22

saveRDS(OraKeggPlot_list, file = paste0(file.path(path_Output_RNA,  paste(paste(paste(save_name, "DEG-Replicates-LFC005-Outlier-hmap-Cluster-ORA", sep="_"), paste("LFC", LFCThreshold, sep=""), Date, sep="_"), ".rds", sep=""))))
5.1.3 Combined Plots
##################
#Combined Barplot#
##################
ClusterKEGGTop10_list <- list()
for (TISSUE in names(OraKeggPlot_list[grepl(paste("Cluster1"), names(OraKeggPlot_list))])) {
  
    a <- length(ClusterKEGGTop10_list)
     
    Tissue      <- gsub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", TISSUE); print(Tissue)

    FILENAME      <- paste("Cluster_ORA_KEGG", Tissue, sep="_" )
      
    ClusterKEGGs <- OraKeggPlot_list[grepl(paste(Tissue), names(OraKeggPlot_list))]

    ClusterKEGG <- do.call(rbind.data.frame, ClusterKEGGs)
    ClusterKEGG$Cluster <- sub("\\..*$", "", rownames(ClusterKEGG))
    ########################
    #Exclude Human Diseases# < mostly not informative 
    ########################
    ClusterKEGG <- ClusterKEGG[ClusterKEGG$category != "Human Diseases",]
    #Take only top 10 Pathways
    ClusterKEGGTop10<-  as.data.frame(ClusterKEGG  %>%  
                                      dplyr::group_by(Cluster) %>% 
                                      dplyr::arrange(p.adjust, desc(p.adjust)) %>%
                                      dplyr::slice_head(n = 10))
    ClusterKEGGTop10 <- na.omit(ClusterKEGGTop10)
    
    ClusterKEGGTop10_plot <- ggplot(ClusterKEGGTop10, aes(x = reorder(Description, -pvalue), 
                                                       y = Count, fill = -p.adjust)) +
    geom_bar(stat = "identity") +
    coord_flip() +
    scale_fill_continuous(low = "deepskyblue3", high = "#b11226") +
    labs(x = "", y = "", fill = "p.adjust") +
    theme(axis.text = element_text(size = 10)) +
    facet_wrap(~ Cluster, ncol = 1, scales = "free") +
    theme_minimal() + atheme + 
    theme(axis.title.x = element_blank()) #+
      #theme(plot.margin = unit(c(1,5,1,1),"cm")) #Top, Right, Bottom, Left.
    ggsave(ClusterKEGGTop10_plot, filename = paste(paste(save_name, Type, FILENAME, sep="_") ,".png", sep=""), path = pathPlots,
               device='png',        dpi=300, width = 6,height = 8)
    ClusterKEGGTop10_list[[a+1]] <- ClusterKEGGTop10_plot
    names(ClusterKEGGTop10_list)[[a+1]] <- Tissue
}
## [1] "Liver"
## [1] "Gill"
5.1.3.1 Cowplot
##################
#Combined cowplot#
##################
for (resSet in names(DeseqLFC005)[grepl("dds", names(DeseqLFC005))]) {
  require(grid)
  require(cowplot)
  tryCatch({
  #https://github.com/kevinblighe/E-MTAB-6141

    Tissue      <- gsub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", resSet); print(resSet)
    Samples <-  if (Tissue == "Gill" ) {
      Samples <- Samples_RNA_Gill }else {
      Samples <- Samples_RNA_Liver }
    
    #Selection of  Sample file
    SAMDF <- if (Tissue == "Gill" ) {
    SAMDF <- SAMDF_RNA_Gill } else {SAMDF <- SAMDF_RNA_Liver}
    
    TPM  <- DeseqLFC005[[paste("tpm", Tissue, sep="")]]
    RES  <- DeseqLFC005[[paste("res", Tissue, sep="")]]
    Core <- DeseqLFC1[[paste("res", Tissue, sep="")]]

    Samples     <-  SAMDF$SAMPLE
    Input       <-  as.data.frame(hmap_list[[paste("Input", Tissue, Type, sep="_")]])
    Input       <-  Input[names(Input) %in% Samples]
    heat        <-  t(scale(t(as.matrix(Input))))   #z-Score of vst data
    
    
    ClusterKEGGTop10 <-           ClusterKEGGTop10_list[[Tissue]]
    cowplot::plot_grid(grid.grabExpr(
    ComplexHeatmap::draw(hmap_list[[paste("hmap", Tissue, Type, sep="_")]], background = "transparent", 
                             heatmap_legend_side = 'left',
                             annotation_legend_side = 'bottom',
                             row_sub_title_side = 'left', 
                             column_title = paste(Species, Tissue, Type, 
      "Heatmap of", " [tpm-z-score ", dim(Input)[1], " genes/ ",     " tpm >", tpm_value," in 3]", sep = " "), 
                             column_title_gp = gpar(fontsize = 10))
    ), labels = c(""), ncol = 1) -> part_1

    
    cowplot::plot_grid(ClusterKEGGTop10, ncol = 1) -> part_2

    cowplot::plot_grid(part_1, part_2, labels = c("", "")) -> part_3
    plot(part_3)
    ggsave(part_3, filename = 
               paste(paste(save_name, Type, paste("CombinePlot_ORA_KEGG_Cluster_Heatmap",Tissue, sep="_"), sep="_") ,".png", sep=""), 
             path = pathPlots, device='png', dpi=300, 
           width = 15,
           height = 8)
    
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
## [1] "ddsLiver"
## [1] "ddsGill"

#-